//
// Created by song on 16-11-18.
//

#ifndef C0COMPILER_LEXER_H
#define C0COMPILER_LEXER_H


#include <vector>
#include <iostream>
#include <set>
#include <fstream>
#include "Token.h"
#include "../helper/Utils.h"
#include "../helper/Error.h"

class Lexer{

public:
    enum STATE { // this is the state of automate machine of lexer.
        BLANK, // tab space \n \r
        IDENTIFIER, // _ a-z A-Z
        OPERATOR, // + - * / < <= > >= != == , (  ) { } [ ] ; : =
        STRING, // "string"
        CHAR, // 'c'
        COMMENT,
        NUMBER // 0-9
    };

    vector<int> lineBreak;
    set<char> operatorFIRSTSet;
    set<char> charContent;
    set<char> identifierFIRSTSet;
    set<char> identifierContent;
    set<string> keywords;

    Lexer(){
        Token::init();
        lineBreak.push_back(-1);
        operatorFIRSTSet = Utils::string2set("+-*/()[]{},:;><=!");
        charContent = Utils::string2set("_+-*/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789");
        identifierFIRSTSet = Utils::string2set("_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
        identifierContent = Utils::string2set("_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789");
        keywords.insert("const");
        keywords.insert("int");
        keywords.insert("char");
        keywords.insert("void");
        keywords.insert("if");
        keywords.insert("else");
        keywords.insert("while");
        keywords.insert("switch");
        keywords.insert("case");
        keywords.insert("default");
        keywords.insert("return");
        keywords.insert("printf");
        keywords.insert("scanf");

    }

    /*
     * this function must be called at the right status.
     * I mean at the end of pre-status. If you call it middle way, for example:
     * "abc'd" if you call beginOf(4) it still return CHAR.
     */
    STATE beginOf(int i) {
        char curChar = source[i];
        if('0'<=curChar && curChar<='9'){
            return NUMBER;
        }else if(identifierFIRSTSet.count(curChar)){
            return IDENTIFIER;
        }else if((curChar=='/')&&(source[i+1]=='/')){
            return COMMENT;
        }else if(operatorFIRSTSet.count(curChar)){
            return OPERATOR;
        }else if(curChar==39){
            return CHAR;
        }else if((curChar=='"')){
            return STRING;
        }else{
            return BLANK;
        }
    }

    void pushToken(STATE state, int start, int len) {
        string content(source, (unsigned long) start, (unsigned long) len);
//        cout<<content<<endl;
        TokenType t;
        switch(state){
            case OPERATOR:
                switch(source[start]){
                    case '>':(len==2 ? t=GreatEqual: t=GreatThan);break;
                    case '<':(len==2 ? t=LessEqual : t=LessThan); break;
                    case '=':(len==2 ? t=Equal     : t=Assign);   break;
                    case '!':t=NotEqual; break;
                    case ',':t=Comma; break;
                    case ':':t=Colon; break;
                    case ';':t=Semicolon; break;
                    case '+':t=Add;   break;
                    case '-':t=Minus; break;
                    case '*':t=Mul;   break;
                    case '/':t=Div;   break;
                    case '(':t=LeftR; break;
                    case ')':t=RightR;break;
                    case '[':t=LeftS; break;
                    case ']':t=RightS;break;
                    case '{':t=LeftB; break;
                    case '}':t=RightB;break;
                    default:
                        Error::nextErrorDetail<< "Not a valid OPERATOR, found " << source[start] << " at " <<__FILE__ << __LINE__;
                        Error::internal(Error::Should_Not_Happen);
                }
                break;
            case IDENTIFIER:
                if(keywords.count(content)){//char const case default int if while printf return switch scanf void
                    switch(source[start]){
                        case 'c':
                            switch(source[start+1]){
                                case 'a':t=Case;break;
                                case 'h':t=Char;break;
                                case 'o':t=Const;break;
                                default:
                                    Error::nextErrorDetail<< "Not a valid IDENTIFIER, found " << source[start] << " at " <<__FILE__ << __LINE__;
                                    Error::internal(Error::Should_Not_Happen);
                            }
                            break;
                        case 'd': t=Default;break;
                        case 'i': (source[start+1]=='n' ? t=Int : t=If);break;
                        case 'w': t=While;break;
                        case 'p': t=Printf;break;
                        case 'r': t=Return;break;
                        case 's': (source[start+1]=='w' ? t=Switch : t=Scanf);break;
                        case 'v': t=Void;break;
                        case 'e': t=Else;break;
                        default:
                            Error::nextErrorDetail<< "Not a valid IDENTIFIER, found " << state << " at " <<__FILE__ << __LINE__;
                            Error::internal(Error::Should_Not_Happen);
                    }
                }else{
                    t=Id;
                }
                break;
            case NUMBER:
                t=Num;
                break;
            case STRING:
                t=StringLiteral;
                break;
            case CHAR:
                t=CharLiteral;
                break;
            default:
                Error::nextErrorDetail<< "Not a valid state when pushToken, found " << source[start] << " at " <<__FILE__ << __LINE__;
                Error::internal(Error::Should_Not_Happen);
        }
        tokenList.push_back(new Token(t, content, start, len));
    }

    void parse(){
        char c;
        int i=0;
        int len;//token length;
        while(i<fileSize){
            len=0;
            switch(beginOf(i)){
                case IDENTIFIER:
                    c=source[i];
                    while(identifierContent.count(c)>0){
                        len++;
                        if(i+len<fileSize){
                            c =source[i+len];
                        }else{
                            len--;
                            break;
                        }
                    }
                    pushToken(IDENTIFIER, i, len);
                    i+=len;
                    break;
                case OPERATOR:
                    switch (source[i]){
                        case '<':
                        case '>':
                        case '=':
                            if(source[i+1]=='='){
                                pushToken(OPERATOR, i, 2);
                                i+=2;
                            }else{
                                pushToken(OPERATOR, i, 1);
                                i+=1;
                            }
                            break;
                        case '!':
                            if(source[i+1]=='='){
                                pushToken(OPERATOR, i, 2);
                                i+=2;
                            }else{
                                Error::syntax(i+1, Error::Expect_Not_Equal);
                            }
                            break;
                        default:
                            pushToken(OPERATOR, i, 1);
                            i+=1;
                    }
                    break;
                case NUMBER:
                    c=source[i];
                    while('0'<= c && c<= '9'){
                        len++;
                        if(i+len<fileSize){
                            c=source[i+len];
                        }else{
                            len--;
                            break;
                        }
                    }
                    pushToken(NUMBER, i, len);
                    i+=len;
                    break;
                case STRING:
                    len++;
                    while(i+len<fileSize){
                        char curChar = source[i+len];
                        if(curChar!=34){ // not end
                            if(32<=curChar && curChar<=126){
                                len++;
                            }else{
                                Error::syntax(i+len, Error::Invalid_String_Content);
                            }
                        }else{ // end "
                            len++;
                            pushToken(STRING, i, len);
                            break;
                        }
                    }
                    if(i==fileSize){
                        Error::syntax(i, Error::Invalid_String_End);//"reach EOF. string not end with \"");
                    }
                    i+=len;
                    break;
                case CHAR:
                    //source[i]=='
                    if(i+2>=fileSize){
                        Error::syntax(i, Error::Invalid_Char_Const_End);//("reach EOF. char not end with '");
                    }
                    if(charContent.count(source[i+1])>0){
                        if(source[i+2]=='\''){
                            pushToken(CHAR, i, 3);
                        }else{
                            Error::syntax(i+2, Error::Invalid_Char_Const_End);
                        }
                    }else{
                        Error::syntax(i+1, Error::Invalid_Char_Content);
                    }
                    i+=3;
                    break;
                case BLANK:
                    if(source[i]=='\n'){
                        lineBreak.push_back(i);
                    }
                    i++;
                    break;

                case COMMENT:
                    while(source[i]!='\n' && i<fileSize){
                        i++;
                    }
                    break;

            }


        }
        tokenList.push_back(new Token(EndOfFile, "EOF", (int) fileSize, 0));
        lineBreak.push_back((const int &) fileSize);
    }

    int recurCompare(int index, int l, int r){
        int mid=(l+r)/2;
        if(mid==l){
            return mid;
        }else{
            if(lineBreak[mid]<=index){
                return recurCompare(index, mid,r);
            }else{
                return recurCompare(index, l,mid);
            }
        }
    }

    string charIndex2pos(int index){
        int line=recurCompare(index, 0, (int) (lineBreak.size() - 1));
        int column =index-lineBreak[line];
        stringstream result;
        result<<"line "<<line+1<<" column "<<column;
        return result.str();
    }

    int nextLineBreakPos(int curPos){
        unsigned long length = lineBreak.size();
        for(int i=0; i<length; i++){
            if(lineBreak[i]>curPos) return lineBreak[i];
        }
        if(length>0){
            return lineBreak[length-1];
        }else{
            Error::nextErrorDetail << "Lexer.lineBreak is empty";
            Error::syntax(curPos, Error::Should_Not_Happen);
        }
    }

    void readSourceFile(string filename){
        cout << "source file: "<<filename;
        ifstream inputStream;
        inputStream.open(filename.c_str());
        if(!inputStream.is_open()){
            Error::compilerError(Error::Can_Not_Open_File);
        }
        inputStream.seekg(0, ios::end);
        long fs = inputStream.tellg();
        this->fileSize = fs;
        if(fs <= MAX_SOURCE_FILE_SIZE){
            if(fs>=1024){
                cout<<" ("<<(fs/1024)<<" KB)"<<endl;
            }else{
                cout<<" ("<<fs<<" Bytes)"<<endl;
            }
            inputStream.seekg( 0, ios::beg );
            inputStream.read( this->source, fs );
        }else{
            Error::compilerError(Error::Source_File_Too_Large);
        }
        inputStream.close();
    }

    void printSource(){
        int lineNo = 1;
        char strLineNo[7];
        sprintf(strLineNo, "% 4d  ", lineNo);
        cout << strLineNo ;
        for(int i=0; i < this->fileSize; i++){
            if(source[i]=='\n'){
                lineNo++;
                sprintf(strLineNo, "% 4d  ", lineNo);
                cout<< this->source[i] <<strLineNo;
            }else{
                cout<< this->source[i];
            }
        }
        cout<<endl;
    }

    void printTokens(){
        vector<Token *>::const_iterator token = tokenList.begin();
        while(token!= tokenList.end()){
            cout<<(*token)->toString()<<endl;
            token++;
        }
    }

    vector<Token*> getTokenList() {
        return tokenList;
    }

    string pointPos(int index) {
        stringstream r;
        int line=recurCompare(index, 0, (int) (lineBreak.size() - 1));
        int fromLine = line-3;
        int toLine = line+3;
        if(fromLine<0){
            fromLine=0;
        }
        if(toLine>=lineBreak.size()){
            toLine = (int) (lineBreak.size() - 1);
        }
        r<<"======================================"<<endl;
        char strLineNo[7];
        for(int lineNo = fromLine; lineNo<toLine; lineNo++){
            sprintf(strLineNo, "% 4d  ", lineNo+1);
            r << strLineNo ;
            for(int i=lineBreak[lineNo]+1; i < lineBreak[lineNo+1]; i++){
                if(source[i]=='\t'){
                    r<< ' ';
                }else{
                    r<< this->source[i];
                }
            }
            r<<endl;
            if(lineNo == line){
                int column =index-lineBreak[line];
                r<<"error ";
                for(int i=0;i<column-1;i++){
                    r<<'-';
                }
                r<<'^'<<endl;
            }
        }

        r<<"======================================"<<endl;
        return r.str();
    }

    void readSourceFileFromStdIn() {
        cout << "input source file from std::cin"<< endl;
        char c;
        int i=0;
        while(std::cin.get(c)){
            source[i] = c;
            i++;
        }
//        cout << source <<endl;
    }

    string pointPos(int start, int end) {
        stringstream r;
        int line=recurCompare(start, 0, (int) (lineBreak.size() - 1));
        int fromLine = line-3;
        int toLine = line+3;
        if(fromLine<0){
            fromLine=0;
        }
        if(toLine>=lineBreak.size()){
            toLine = (int) (lineBreak.size() - 1);
        }
        r<<"======================================"<<endl;
        char strLineNo[7];
        for(int lineNo = fromLine; lineNo<toLine; lineNo++){
            sprintf(strLineNo, "% 4d  ", lineNo+1);
            r << strLineNo ;
            for(int i=lineBreak[lineNo]+1; i < lineBreak[lineNo+1]; i++){
                if(source[i]=='\t'){
                    r<< ' ';
                }else{
                    r<< this->source[i];
                }
            }
            r<<endl;
            if(lineNo == line){
                int column = start - lineBreak[line];
                r<<"error ";
                for(int i=0;i<column-1;i++){
                    r<<'-';
                }
                for(int i=0; i < end-start; i++){
                    r<<'^';
                }
                r<<endl;
            }
        }

        r<<"======================================"<<endl;
        return r.str();
    }

//private:
    long fileSize;
    char source[MAX_SOURCE_FILE_SIZE];
    vector<Token*> tokenList;
};


#endif //C0COMPILER_LEXER_H
